setwd("H:\\EB5205BarryWebAnalyticsCA\\nasa_source_code")
The working directory was changed to H:/EB5205BarryWebAnalyticsCA/nasa_source_code inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
library("arules")
nasa_transactions = read.transactions(file="nasa_data/sessionized_data/sessionize_date_july.csv",rm.duplicates=TRUE, format="single", sep=",", cols=c("session_id","webpage"));
EOF within quoted stringnumber of items read is not a multiple of the number of columns
rules <- apriori(nasa_transactions, parameter = list(supp=0.01, conf=0.01, minlen=2))
Apriori
Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen maxlen target ext
0.01 0.1 1 none FALSE TRUE 5 0.01 2 10 rules FALSE
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE
Absolute minimum support count: 708
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[1614 item(s), 70824 transaction(s)] done [0.03s].
sorting and recoding items ... [64 item(s)] done [0.00s].
creating transaction tree ... done [0.02s].
checking subsets of size 1 2 3 4 done [0.01s].
writing ... [398 rule(s)] done [0.00s].
creating S4 object ... done [0.01s].
summary(rules)
set of 398 rules
rule length distribution (lhs + rhs):sizes
2 3 4
218 168 12
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 2.000 2.000 2.482 3.000 4.000
summary of quality measures:
support confidence lift count
Min. :0.01008 Min. :0.04494 Min. : 0.5204 Min. : 714.0
1st Qu.:0.01154 1st Qu.:0.18770 1st Qu.: 1.5688 1st Qu.: 817.5
Median :0.01382 Median :0.30621 Median : 2.8995 Median : 979.0
Mean :0.01738 Mean :0.38706 Mean : 5.3790 Mean :1230.8
3rd Qu.:0.01845 3rd Qu.:0.52620 3rd Qu.: 6.3766 3rd Qu.:1307.0
Max. :0.06021 Max. :0.99471 Max. :46.1623 Max. :4264.0
mining info:
data ntransactions support confidence
nasa_transactions 70824 0.01 0.01
#inspect(rules)
# a useful plot of training data
itemFrequencyPlot(nasa_transactions,topN=20,type="absolute")
Interesting findings: 1. Apollo 13 the movie on the eventful 1969 mission to the moon was released at end of June 1995 which spiked a lot of interest in the Apollo 13 mission. Incidentally, Apollo 13 had launched from the KSC and so many people were searching for the history of the Apollo 13 mission presumably since the internet was in its nascent state and Wikipedia (or other open internet based encyclopedias) were not yet launched then. References: 1. https://en.wikipedia.org/wiki/Apollo_13_(film)
#read the test data
setwd("H:\\EB5205BarryWebAnalyticsCA\\nasa_source_code")
The working directory was changed to H:/EB5205BarryWebAnalyticsCA/nasa_source_code inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
nasa_aug_file = read.csv(file="nasa_data/sessionized_data/sessionize_date_aug.csv")
nasa_aug_file <- nasa_aug_file[c("session_id", "webpage")]
head(nasa_aug_file)
# execute ruleset using item as rule antecedent (handles single item antecedents only)
makepreds <- function(item, rulesDF) {
antecedent = paste("{",item,"} =>",sep="")
firingrules = rulesDF[grep(antecedent, rulesDF$rules,fixed=TRUE),1]
gsub(" ","",toString(sub("\\}","",sub(".*=> \\{","",firingrules))))
}
rulesDF = as(rules,"data.frame")
nasa_aug_file$preds = apply(nasa_aug_file,1,function(X) makepreds(X["webpage"], rulesDF))
head(nasa_aug_file)
# which are the top rules by lift
top.lift <- sort(rules, decreasing = TRUE, na.last = NA, by = "lift")
inspect(head(top.lift, 20))
lhs rhs support confidence lift count
[1] {/shuttle/missions/51-l/mission-51-l.html} => {/shuttle/missions/51-l/51-l-info.html} 0.01022252 0.4868863 46.16230 724
[2] {/shuttle/missions/51-l/51-l-info.html} => {/shuttle/missions/51-l/mission-51-l.html} 0.01022252 0.9692102 46.16230 724
[3] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/images/,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01061787 0.9947090 26.26744 752
[4] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/movies/,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01049079 0.9893475 26.12586 743
[5] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/sounds/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01349825 0.9825283 25.94578 956
[6] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/movies/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01551734 0.9786287 25.84280 1099
[7] {/history/apollo/apollo-13/images/,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01084378 0.9770992 25.80241 768
[8] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/images/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01539026 0.9740840 25.72279 1090
[9] {/history/apollo/apollo-13/movies/,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01073083 0.9706258 25.63147 760
[10] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/images/} 0.01061787 0.4670807 25.23305 752
[11] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/images/} 0.01084378 0.4663024 25.19100 768
[12] {/history/apollo/apollo-13/docs/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01047667 0.9428208 24.89722 742
[13] {/history/apollo/apollo-13/apollo-13-info.html} => {/history/apollo/apollo-13/docs/} 0.01047667 0.2766592 24.89722 742
[14] {/history/apollo/apollo-13/,
/history/apollo/apollo-13/apollo-13.html} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01287699 0.9258883 24.45008 912
[15] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/movies/} 0.01049079 0.4614907 24.35515 743
[16] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/movies/} 0.01073083 0.4614451 24.35275 760
[17] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/apollo-13.html} => {/history/apollo/apollo-13/images/} 0.01539026 0.4467213 24.13317 1090
[18] {/history/apollo/apollo-13/images/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01683045 0.9092296 24.01017 1192
[19] {/history/apollo/apollo-13/apollo-13-info.html} => {/history/apollo/apollo-13/images/} 0.01683045 0.4444444 24.01017 1192
[20] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/apollo-13.html} => {/history/apollo/apollo-13/sounds/} 0.01349825 0.3918033 23.90101 956
#remove duplicate items from a basket (itemstrg)
uniqueitems <- function(itemstrg) {
unique(as.list(strsplit(gsub(" ","",itemstrg),","))[[1]])
}
# count how many predictions are in the basket of items already seen by that user
# Caution : refers to "baskets" as a global
checkpreds <- function(preds, baskID) {
plist = preds[[1]]
blist = baskets[baskets$basketID == baskID,"webpage"][[1]]
cnt = 0
for (p in plist) {
if (p %in% blist) cnt = cnt+1
}
cnt
}
# count all predictions made
countpreds <- function(predlist) {
len = length(predlist)
if (len > 0 && (predlist[[1]] == "")) 0 # avoid counting an empty list
else len
}
# extract unique predictions for each test session - predictions
userpreds = as.data.frame(aggregate(preds ~ session_id, data = nasa_aug_file, paste, collapse=","))
userpreds$preds = apply(userpreds,1,function(X) uniqueitems(X["preds"]))
# extract unique items visited most for each test session - actual
baskets = as.data.frame(aggregate(webpage ~ session_id, data = nasa_aug_file, paste, collapse=","))
baskets$webpage = apply(baskets,1,function(X) uniqueitems(X["webpage"]))
baskets
#remove duplicate items from a basket (itemstrg)
uniqueitems <- function(itemstrg) {
unique(as.list(strsplit(gsub(" ","",itemstrg),","))[[1]])
}
# count how many predictions are in the basket of items already seen by that user
# Caution : refers to "baskets" as a global
checkpreds <- function(preds, baskID) {
# print(baskID)
plist = preds[[1]]
blist = baskets[baskets$session_id == baskID,"webpage"][[1]]
cnt = 0
for (p in plist) {
if (p %in% blist) cnt = cnt+1
}
cnt
}
# count all predictions made
countpreds <- function(predlist) {
len = length(predlist)
if (len > 0 && (predlist[[1]] == "")) 0 # avoid counting an empty list
else len
}
#count how many unique predictions made are correct, i.e. have previously been bought (or rated highly) by the user
correctpreds = sum(apply(userpreds,1,function(X) checkpreds(X["preds"],X["session_id"])))
# count total number of unique predictions made
totalpreds = sum(apply(userpreds,1,function(X) countpreds(X["preds"][[1]])))
precision = correctpreds*100/totalpreds
cat("precision=", precision, "corr=",correctpreds,"total=",totalpreds)
library(arulesViz)
#plot(rules)
plotly_arules(rules)
'plotly_arules' is deprecated.
Use 'plot' instead.
See help("Deprecated")To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rules, method="graph")
plot: Too many rules supplied. Only plotting the best 100 rules using <U+393C><U+3E31>support<U+393C><U+3E32> (change control parameter max if needed)
plot(rules, method="graph",nodeCol=grey.colors(10),edgeCol=grey(.7),alpha=1)
plot: Too many rules supplied. Only plotting the best 100 rules using <U+393C><U+3E31>support<U+393C><U+3E32> (change control parameter max if needed)
plot(rules, method="matrix")
Itemsets in Antecedent (LHS)
[1] "{/shuttle/missions/51-l/51-l-info.html}"
[2] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/images/,/history/apollo/apollo.html}"
[3] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/movies/,/history/apollo/apollo.html}"
[4] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/sounds/}"
[5] "{/shuttle/missions/51-l/mission-51-l.html}"
[6] "{/history/apollo/apollo-13/docs/}"
[7] "{/history/apollo/apollo-13/,/history/apollo/apollo-13/apollo-13.html}"
[8] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo.html}"
[9] "{/history/apollo/apollo-13/images/,/history/apollo/apollo.html}"
[10] "{/history/apollo/apollo-13/movies/,/history/apollo/apollo.html}"
[11] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/apollo-13.html}"
[12] "{/history/apollo/apollo-13/apollo-13-info.html}"
[13] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo.html}"
[14] "{/history/apollo/apollo-13/sounds/}"
[15] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/movies/}"
[16] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/images/}"
[17] "{/history/apollo/apollo-13/}"
[18] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo.html,/history/history.html}"
[19] "{/history/apollo/apollo-13/images/}"
[20] "{/history/apollo/apollo-13/movies/}"
[21] "{/history/apollo/apollo-13/apollo-13.html,/history/history.html}"
[22] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo.html,/history/history.html}"
[23] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/images/,/history/apollo/apollo.html}"
[24] "{/shuttle/technology/sts-newsref/sts_asm.html}"
[25] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/movies/,/history/apollo/apollo.html}"
[26] "{/history/apollo/apollo-13/apollo-13-info.html,/history/history.html}"
[27] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/sounds/}"
[28] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/apollo-13.html,/history/history.html}"
[29] "{/history/apollo/apollo-13/,/history/apollo/apollo-13/apollo-13-info.html}"
[30] "{/history/apollo/apollo-11/apollo-11.html,/history/apollo/apollo-13/apollo-13.html}"
[31] "{/history/apollo/apollo-1/apollo-1.html,/history/apollo/apollo-13/apollo-13.html}"
[32] "{/shuttle/missions/sts-73/mission-sts-73.html}"
[33] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/images/}"
[34] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/movies/}"
[35] "{/history/apollo/apollo-13/apollo-13.html,/shuttle/missions/missions.html}"
[36] "{/history/apollo/apollo-1/apollo-1.html}"
[37] "{/,/history/apollo/apollo-13/apollo-13.html}"
[38] "{/history/mercury/mercury.html}"
[39] "{/,/history/apollo/apollo.html}"
[40] "{/history/apollo/apollo-1/apollo-1.html,/history/apollo/apollo.html}"
[41] "{/history/apollo/apollo-13/apollo-13.html,/ksc.html}"
[42] "{/history/apollo/apollo.html,/shuttle/missions/missions.html}"
[43] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/images/}"
[44] "{/history/apollo/apollo.html,/ksc.html}"
[45] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/movies/}"
[46] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo.html}"
[47] "{/history/apollo/apollo-11/apollo-11.html,/history/apollo/apollo.html}"
[48] "{/shuttle/missions/missions.html,/shuttle/missions/sts-71/images/images.html}"
[49] "{/shuttle/resources/orbiters/discovery.html}"
[50] "{/shuttle/missions/sts-78/mission-sts-78.html}"
[51] "{/history/apollo/apollo-13/apollo-13.html}"
[52] "{/shuttle/missions/missions.html,/shuttle/missions/sts-69/mission-sts-69.html}"
[53] "{/shuttle/countdown/,/shuttle/countdown/countdown.html}"
[54] "{/,/history/history.html}"
[55] "{/history/history.html,/ksc.html}"
[56] "{/history/apollo/apollo-11/apollo-11.html}"
[57] "{/history/history.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[58] "{/history/apollo/apollo.html}"
[59] "{/shuttle/missions/sts-69/mission-sts-69.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[60] "{/shuttle/missions/sts-67/mission-sts-67.html}"
[61] "{/shuttle/resources/orbiters/atlantis.html}"
[62] "{/shuttle/missions/sts-70/movies/movies.html}"
[63] "{/history/apollo/apollo.html,/history/history.html}"
[64] "{/shuttle/missions/sts-69/mission-sts-69.html}"
[65] "{/shuttle/missions/sts-70/images/images.html}"
[66] "{/,/shuttle/missions/sts-70/movies/movies.html}"
[67] "{/,/shuttle/missions/sts-70/images/images.html}"
[68] "{/ksc.html,/shuttle/missions/sts-70/images/images.html}"
[69] "{/shuttle/missions/sts-70/mission-sts-70.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[70] "{/shuttle/missions/sts-70/mission-sts-70.html,/shuttle/missions/sts-70/movies/movies.html}"
[71] "{/shuttle/technology/sts-newsref/stsref-toc.html}"
[72] "{/,/shuttle/countdown/liftoff.html}"
[73] "{/ksc.html,/shuttle/countdown/liftoff.html}"
[74] "{/shuttle/countdown/,/shuttle/missions/sts-70/movies/movies.html}"
[75] "{/shuttle/countdown/liftoff.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[76] "{/shuttle/missions/sts-70/images/images.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[77] "{/shuttle/countdown/liftoff.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[78] "{/history/history.html}"
[79] "{/shuttle/countdown/liftoff.html,/shuttle/missions/missions.html}"
[80] "{/shuttle/missions/missions.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[81] "{/shuttle/countdown/,/shuttle/missions/sts-70/mission-sts-70.html}"
[82] "{/shuttle/countdown/,/shuttle/missions/missions.html}"
[83] "{/facilities/lc39a.html}"
[84] "{/facts/about_ksc.html}"
[85] "{/shuttle/countdown/,/shuttle/missions/sts-70/images/images.html}"
[86] "{/ksc.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[87] "{/history/history.html,/shuttle/missions/missions.html}"
[88] "{/,/shuttle/missions/sts-70/mission-sts-70.html}"
[89] "{/shuttle/countdown/countdown.html}"
[90] "{/ksc.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[91] "{/shuttle/missions/sts-70/mission-sts-70.html}"
[92] "{/shuttle/missions/sts-71/images/images.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[93] "{/ksc.html,/shuttle/missions/missions.html}"
[94] "{/,/shuttle/missions/sts-71/mission-sts-71.html}"
[95] "{/,/shuttle/countdown/}"
[96] "{/shuttle/missions/missions.html}"
[97] "{/shuttle/countdown/,/shuttle/countdown/liftoff.html}"
[98] "{/,/shuttle/missions/missions.html}"
[99] "{/ksc.html,/shuttle/countdown/}"
[100] "{/,/ksc.html}"
[101] "{/shuttle/missions/sts-71/mission-sts-71.html}"
[102] "{/ksc.html,/shuttle/missions/sts-71/images/images.html}"
[103] "{/shuttle/missions/missions.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[104] "{/shuttle/missions/sts-71/movies/movies.html}"
[105] "{/shuttle/countdown/liftoff.html,/shuttle/missions/sts-70/images/images.html}"
[106] "{/,/shuttle/missions/sts-71/images/images.html}"
[107] "{/shuttle/countdown/,/shuttle/missions/sts-71/images/images.html}"
[108] "{/shuttle/countdown/lps/fr.html}"
[109] "{/shuttle/countdown/,/shuttle/missions/sts-71/mission-sts-71.html}"
[110] "{/shuttle/countdown/liftoff.html}"
[111] "{/shuttle/countdown/countdown.html,/shuttle/countdown/liftoff.html}"
[112] "{/shuttle/missions/sts-71/images/images.html}"
[113] "{/shuttle/countdown/}"
[114] "{/ksc.html}"
[115] "{/}"
Itemsets in Consequent (RHS)
[1] "{/}"
[2] "{/ksc.html}"
[3] "{/shuttle/missions/sts-71/images/images.html}"
[4] "{/shuttle/countdown/lps/fr.html}"
[5] "{/shuttle/missions/sts-71/movies/movies.html}"
[6] "{/shuttle/countdown/}"
[7] "{/shuttle/countdown/liftoff.html}"
[8] "{/shuttle/missions/missions.html}"
[9] "{/shuttle/missions/sts-71/mission-sts-71.html}"
[10] "{/shuttle/countdown/countdown.html}"
[11] "{/facts/about_ksc.html}"
[12] "{/facilities/lc39a.html}"
[13] "{/shuttle/missions/sts-70/mission-sts-70.html}"
[14] "{/shuttle/technology/sts-newsref/stsref-toc.html}"
[15] "{/shuttle/missions/sts-70/images/images.html}"
[16] "{/shuttle/missions/sts-69/mission-sts-69.html}"
[17] "{/shuttle/resources/orbiters/atlantis.html}"
[18] "{/shuttle/missions/sts-67/mission-sts-67.html}"
[19] "{/shuttle/missions/sts-70/movies/movies.html}"
[20] "{/history/history.html}"
[21] "{/history/apollo/apollo-11/apollo-11.html}"
[22] "{/shuttle/missions/sts-78/mission-sts-78.html}"
[23] "{/shuttle/resources/orbiters/discovery.html}"
[24] "{/history/apollo/apollo.html}"
[25] "{/history/apollo/apollo-13/apollo-13.html}"
[26] "{/history/mercury/mercury.html}"
[27] "{/shuttle/missions/sts-73/mission-sts-73.html}"
[28] "{/history/apollo/apollo-1/apollo-1.html}"
[29] "{/shuttle/technology/sts-newsref/sts_asm.html}"
[30] "{/history/apollo/apollo-13/}"
[31] "{/history/apollo/apollo-13/movies/}"
[32] "{/history/apollo/apollo-13/images/}"
[33] "{/history/apollo/apollo-13/sounds/}"
[34] "{/history/apollo/apollo-13/apollo-13-info.html}"
[35] "{/history/apollo/apollo-13/docs/}"
[36] "{/shuttle/missions/51-l/mission-51-l.html}"
[37] "{/shuttle/missions/51-l/51-l-info.html}"
plot(rules, method="paracoord", control=list(reorder=TRUE))
total_relevant_instances = nrow(nasa_aug_file)
recall = correctpreds * 100 / total_relevant_instances
recall
[1] 33.11959
library(arulesViz)
package <U+393C><U+3E31>arulesViz<U+393C><U+3E32> was built under R version 3.4.4Loading required package: grid
plot(top.lift, method="graph",nodeCol=grey.colors(10),edgeCol=grey(.7),alpha=1)
plot: Too many rules supplied. Only plotting the best 100 rules using <U+393C><U+3E31>support<U+393C><U+3E32> (change control parameter max if needed)